library("here")
library("sjPlot")
library("tidyverse")
library("lme4")
library("viridis")
library("lmerTest")
library("ggplot2")
library("gridExtra")
library("gt")
library("ggthemes")

source(here("p4_analysis", "analysis_functions.R"))

p4path <- here("p4_analysis", "outputs")
p3path <- here("p3_methods", "outputs")

1. MMRR

1.1 Individual sampling

1.1.1 Summary plots

mmrr_ind <- format_mmrr(here(p3path, "mmrr_indsampling_results.csv"))

# overall error
summary_hplot(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_ind, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_ind, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(mmrr_ind, "ratio_err", divergent = TRUE)

summary_hplot(mmrr_ind, "comboenv_err", divergent = TRUE)

summary_hplot(mmrr_ind, "geo_err", divergent = TRUE)

1.1.2 Model summaries

run_lmer(mmrr_ind, "ratio_ae", filepath = here(p4path, "MMRR_individual_ratioerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - grid*** −0.0046 0.0016 −2.7960 0.02658997***
envgeo - rand −0.0009 0.0016 −0.5319 0.95132229
envgeo - trans** −0.0249 0.0016 −15.0697 0.0**
grid - rand 0.0037 0.0016 2.2641 0.10653142
grid - trans** −0.0202 0.0016 −12.2737 0.0**
rand - trans** −0.0240 0.0016 −14.5378 0.0**
*** p < 0.05
** p < 0.001
run_lmer(mmrr_ind, "geo_ae", filepath = here(p4path, "MMRR_individual_geoerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - grid*** −0.0021 0.0007 −3.1344 0.009333831***
envgeo - rand −0.0010 0.0007 −1.5145 0.428730954
envgeo - trans** −0.0223 0.0007 −33.5784 0.0**
grid - rand 0.0011 0.0007 1.6199 0.367294910
grid - trans** −0.0202 0.0007 −30.4440 0.0**
rand - trans** −0.0213 0.0007 −32.0639 0.0**
*** p < 0.01
** p < 0.001
run_lmer(mmrr_ind, "env_ae", filepath = here(p4path, "MMRR_individual_enverr.csv"))
Contrast Estimate SE Z ratio p
envgeo - grid −0.0005 0.0004 −1.2011 0.6261246
envgeo - rand 0.0005 0.0004 1.0594 0.7142981
envgeo - trans*** −0.0045 0.0004 −10.1643 3.7 × 10−14***
grid - rand 0.0010 0.0004 2.2605 0.1074219
grid - trans*** −0.0040 0.0004 −8.9632 2.7 × 10−14***
rand - trans*** −0.0050 0.0004 −11.2237 8.0 × 10−15***
*** p < 0.001

1.1.3 Megaplots

MEGAPLOT(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_ind, "ratio_err", colpal = "viridis", divergent = TRUE)

MEGAPLOT(mmrr_ind, "comboenv_err", divergent = TRUE)

MEGAPLOT(mmrr_ind, "geo_err", divergent = TRUE)

1.2 Site sampling

1.2.1 Summary plots

mmrr_site <- format_mmrr(here(p3path, "mmrr_sitesampling_results.csv"))

# overall error
summary_hplot(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_site, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_site, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(mmrr_site, "ratio_err", divergent = TRUE)

summary_hplot(mmrr_site, "comboenv_err", divergent = TRUE)

summary_hplot(mmrr_site, "geo_err", divergent = TRUE)

1.2.2 Model summaries

run_lmer(mmrr_site, "ratio_ae", filepath = here(p4path, "MMRR_site_ratioerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - equi*** −0.0340 0.0031 −10.8514 2.7 × 10−14***
envgeo - rand** −0.0101 0.0031 −3.2163 3.7 × 10−3**
equi - rand*** 0.0239 0.0031 7.6351 8.6 × 10−14***
*** p < 0.001
** p < 0.01
run_lmer(mmrr_site, "geo_ae", filepath = here(p4path, "MMRR_site_geoerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - equi*** 0.0098 0.0015 6.5604 1.6 × 10−10***
envgeo - rand 0.0018 0.0015 1.2065 0.4492773
equi - rand*** −0.0080 0.0015 −5.3539 2.6 × 10−7***
*** p < 0.001
run_lmer(mmrr_site, "env_ae", filepath = here(p4path, "MMRR_site_enverr.csv"))
Contrast Estimate SE Z ratio p
envgeo - equi*** −0.0116 0.0011 −10.4807 2.7 × 10−14***
envgeo - rand** −0.0035 0.0011 −3.1708 4.3 × 10−3**
equi - rand*** 0.0081 0.0011 7.3099 8.2 × 10−13***
*** p < 0.001
** p < 0.01

1.2.3 Megaplots

MEGAPLOT(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_site, "ratio_err", divergent = TRUE)

MEGAPLOT(mmrr_site, "comboenv_err", divergent = TRUE)

MEGAPLOT(mmrr_site, "geo_err", divergent = TRUE)

2. GDM

2.1 Individual sampling

2.1.1 Summary plots

gdm_ind <- format_gdm(here(p3path, "gdm_indsampling_results.csv"))

# overall error
summary_hplot(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_ind, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_ind, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(gdm_ind, "ratio_err", divergent = TRUE)

summary_hplot(gdm_ind, "comboenv_err", divergent = TRUE)

summary_hplot(gdm_ind, "geo_err", divergent = TRUE)

2.1.2 Model summaries

run_lmer(gdm_ind, "ratio_ae", filepath = here(p4path, "GDM_individual_ratioerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - grid*** −0.0135 0.0024 −5.7327 5.9 × 10−8***
envgeo - rand*** −0.0097 0.0024 −4.0895 2.5 × 10−4***
envgeo - trans*** −0.0198 0.0024 −8.3745 4.1 × 10−14***
grid - rand 0.0039 0.0024 1.6429 0.35443857
grid - trans** −0.0062 0.0024 −2.6417 0.04111055**
rand - trans*** −0.0101 0.0024 −4.2845 1.1 × 10−4***
*** p < 0.001
** p < 0.05
run_lmer(gdm_ind, "geo_ae", filepath = here(p4path, "GDM_individual_geoerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - grid*** 0.0899 0.0045 19.8550 0.0***
envgeo - rand*** 0.0204 0.0045 4.5115 3.8 × 10−5***
envgeo - trans*** 0.0493 0.0045 10.8910 4.0 × 10−14***
grid - rand*** −0.0695 0.0045 −15.3432 0.0***
grid - trans*** −0.0406 0.0045 −8.9659 2.7 × 10−14***
rand - trans*** 0.0289 0.0045 6.3789 1.1 × 10−9***
*** p < 0.001
run_lmer(gdm_ind, "env_ae", filepath = here(p4path, "GDM_individual_enverr.csv"))
Contrast Estimate SE Z ratio p
envgeo - grid −0.0026 0.0013 −2.0550 0.1681013
envgeo - rand*** −0.0046 0.0013 −3.6538 1.5 × 10−3***
envgeo - trans** −0.0106 0.0013 −8.3556 4.3 × 10−14**
grid - rand −0.0020 0.0013 −1.5990 0.3791273
grid - trans** −0.0080 0.0013 −6.3009 1.8 × 10−9**
rand - trans** −0.0060 0.0013 −4.7013 1.5 × 10−5**
*** p < 0.01
** p < 0.001

2.1.3 Megaplots

MEGAPLOT(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_ind, "ratio_err", divergent = TRUE)

MEGAPLOT(gdm_ind, "comboenv_err", divergent = TRUE)

MEGAPLOT(gdm_ind, "geo_err", divergent = TRUE)

2.1.4 Prop NA

Confirm that the distribution of NAs is as expected and the proportions are small

MEGAPLOT(gdm_ind, "geo_coeff", aggfunc = "prop_na", colpal = "mako")

2.2 Site sampling

2.2.1 Summary plots

gdm_site <- format_gdm(here(p3path, "gdm_sitesampling_results.csv"))

# overall error
summary_hplot(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_site, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_site, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(gdm_site, "ratio_err", divergent = TRUE)

summary_hplot(gdm_site, "comboenv_err", divergent = TRUE)

summary_hplot(gdm_site, "geo_err", divergent = TRUE)

2.2.2 Model summaries

run_lmer(gdm_site, "ratio_ae", filepath = here(p4path, "GDM_site_ratioerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - equi*** −0.0760 0.0044 −17.3557 0.0***
envgeo - rand*** −0.0272 0.0044 −6.1624 2.1 × 10−9***
equi - rand*** 0.0488 0.0044 11.1986 7.3 × 10−15***
*** p < 0.001
run_lmer(gdm_site, "geo_ae", filepath = here(p4path, "GDM_site_geoerr.csv"))
Contrast Estimate SE Z ratio p
envgeo - equi*** 0.2882 0.0134 21.4290 0.0***
envgeo - rand 0.0229 0.0136 1.6897 0.2091145
equi - rand*** −0.2652 0.0134 −19.8333 0.0***
*** p < 0.001
run_lmer(gdm_site, "env_ae", filepath = here(p4path, "GDM_site_enverr.csv"))
Contrast Estimate SE Z ratio p
envgeo - equi*** −0.0307 0.0034 −8.9621 2.1 × 10−14***
envgeo - rand*** −0.0267 0.0035 −7.7294 5.0 × 10−14***
equi - rand 0.0040 0.0034 1.1684 0.4721507
*** p < 0.001

2.2.3 Megaplots

MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_err", divergent = TRUE)

MEGAPLOT(gdm_site, "comboenv_err", divergent = TRUE)

MEGAPLOT(gdm_site, "geo_err", divergent = TRUE)

2.2.4 Prop NA

Confirm that the distribution of NAs is as expected and the proportions are small

MEGAPLOT(gdm_site, "geo_coeff", aggfunc = "prop_na", colpal = "mako")

3. Comparison of error distirbutions for MMRR and GDM

mmrr_ind %>%
  filter(sampstrat != "full") %>%
  ggplot(aes(x = ratio_ae, fill = m, colour = m)) +
  geom_density(alpha = 0.5) +
  theme_few() +
  scale_fill_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako") +
  scale_color_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako")

gdm_ind %>%
  filter(sampstrat != "full") %>%
  ggplot(aes(x = ratio_ae, fill = m, colour = m)) +
  geom_density(alpha = 0.5) +
  theme_few() +
  scale_fill_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako") +
  scale_color_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako")
## Warning: Removed 5 rows containing non-finite values (stat_density).

4. Compare results of MMRR and GDM

plot(mmrr_ind$geo_coeff, gdm_ind$geo_coeff, col = gdm_ind$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)

plot(mmrr_ind$comboenv_coeff, gdm_ind$comboenv_coeff)

df <- data.frame(mmrr_ind, 
                 geo_mg = gdm_ind$geo_coeff/mmrr_ind$geo_coeff, 
                 env_mg = gdm_ind$comboenv_coeff/mmrr_ind$comboenv_coeff,
                 ratio_mg = gdm_ind$ratio/mmrr_ind$ratio )
summary_hplot(df, "geo_mg")

summary_hplot(df, "env_mg")

summary_hplot(df, "ratio_mg")

MEGAPLOT(df, "geo_mg")

MEGAPLOT(df, "env_mg")

MEGAPLOT(df, "ratio_mg")

plot(mmrr_site$geo_coeff, gdm_site$geo_coeff, col = gdm_site$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)

plot(mmrr_site$comboenv_coeff, gdm_site$comboenv_coeff)

df <- data.frame(mmrr_site, 
                 geo_mg = gdm_site$geo_coeff/mmrr_site$geo_coeff, 
                 env_mg = gdm_site$comboenv_coeff/mmrr_site$comboenv_coeff,
                 ratio_mg = gdm_site$ratio/mmrr_site$ratio )
summary_hplot(df, "geo_mg")

summary_hplot(df, "env_mg")

summary_hplot(df, "ratio_mg")